## imputation of raw data ##
## set your directory ##
	setwd('~/Dropbox/Police_Underreporting/data/fullLengthReplication/replication')

## load packages and data ##
	library(Amelia)
	library(haven)

	new <- read_dta('rawUcrNew.dta')
	old <- read_dta('rawUcrOld.dta')
	summary(new)
	summary(old)

## in the old data, the capacity variable for years preceding ##
## the Bowen and Greene data (1960-1973) use the 1974 values ##
## these give similar results to what we report in the article, ##
## but we impute to err on the side of caution ##

## so, assign missing values to pre-74 capacity values ##
	old$faOrd[old$year < 1974] <- NA

## now there is a second problem ##
## 0 population counts conflate 'true' zeros with missingnes: ##
	## 1) 'true' zero = agency is not tied to a particular population (common in highway patrol) ##
	## 2) missing data ##
## this finds the 'true' (universal) 0s and assigns missingness to the 'untrue' 0s ##
	
	new$trueZero <- ifelse(ave(new$population, new$ori, FUN=mean)==0,1,0)
	old$trueZero <- ifelse(ave(old$population, old$ori, FUN=mean)==0,1,0)
	
	new$population[new$population == 0 & new$trueZero == 0] <- NA
	old$population[old$population == 0 & old$trueZero == 0] <- NA

## pare down the data to necessary variables ##
	old <- subset(old, select = -trueZero)
	new <- subset(new, select = -trueZero)

## impute the missing values ##
	set.seed(0624)
	old <- data.frame(old)
	new <- data.frame(new)
	oldImp <- amelia(old, m = 5, cs = 'ori', ts = 'year', idvars = c('state', 'type'))
	newImp <- amelia(new, m = 5, cs = 'ori', ts = 'year', idvars = c('state', 'type'))

## in the analyses for the article, we use only the first set ##
## so these data will replicate the article analyses ##
	write_dta(newImp$imputations[[1]], 'ucrImpNew.dta')
	write_dta(oldImp$imputations[[1]], 'ucrImpOld.dta')